---
title: "prep_data"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r}
# https://rdatatable.gitlab.io/data.table/articles/datatable-intro.html
# https://richarddmorey.github.io/BayesFactor/
rm(list = ls())
library(data.table); library(tidyverse); library(psych)
# source("creds.R")
```

```{r}
d0 <- fread("../data/raw/TikTok Misinfo - (F21) - Prolific_August 27, 2022_11.57.csv")
dim(d0)
glimpse(d0[-c(1, 2)])

d0_numeric <- fread("../data/raw/TikTok Misinfo - (F21) - Prolific_August 28, 2022_23.08_numeric.csv")
dim(d0_numeric)


sum(d0$ResponseId == d0_numeric$ResponseId) #should be 1370, same as number of records in the d0

# replace all the numeric columns
numeric_cols <- list("Aspartame", "Asymptomatic", "Rust", "Herd", "Ivermectin", "Brain", "TikTok_Use", "TikTok_RelativeUse", "AOT_1", "AOT_2", "AOT_3", "AOT_4", "AOT_5", "AOT_6", "CMT_1", "CMT_2", "CMT_3", "CMT_4", "CMT_5", "CMT_6", "PIT_1", "PIT_2", "PIT_3", "PIT_4", "PIT_5", "PIT_6", "PET_1", "PET_2", "PET_3", "PET_4", "PET_5", "PET_6", "Education", "Social_Conserv", "Economic_Conserv", "DemRep_C")

for (x in numeric_cols) {d0[[x]] <- d0_numeric[[x]]}

```

```{r}
# remove first 2 rows
d1 = tail(d0, -2)
glimpse(d1)
fwrite(d1, "../data/raw/tiktok.csv")
d1 <- fread("../data/raw/tiktok.csv")
glimpse(d1)

# remove participants who did not finish the survey
d2 <- d1[d1$Progress==100 & d1$Finished,]

# remove survey preview records
d3 <- d2[d2$Status!="Survey Preview",]

# only keep people who consented
d4 <- d3[d3$Consent=="Agree",]

# remove participants who fail screener one
d5 <- d4[d4$screener_1=="Red,Green",]

# remove participants who do not have volume on
d6 <- d5[d5$Vol=="Yes" | (d5$Vol=="No" & d5$VolCheck=="My volume is on now"),]
d6[, table(Vol)]
d6[, table(VolCheck)]
```


```{r attention screeners}
# create attention flag using screeners 2 and 3
d6[, screener_2_8_TEXT := tolower(screener_2_8_TEXT)]
d6[, table(screener_2_8_TEXT)]
d6[, screener2acc := as.numeric(grepl("read", screener_2_8_TEXT))]
d6[, table(screener2acc)]

d6[, table(screener_3)]
d6[, screener3acc := 0]
d6[screener_3 == "Very interested,Extremely interested", screener3acc := 1]
d6[, table(screener3acc)]

d6[, screen_acc := (screener3acc + screener2acc) / 2]
glimpse(d6)
```

```{r wide to long}
# remove all spaces from col names
names(d6) <- gsub(" ", "", names(d6))
glimpse(d6)

names(d6) <- gsub("RT_", "RT", names(d6))
glimpse(d6)

d6 <- mutate_if(d6, is.integer, as.numeric)

d66 <- melt(select(d6, ResponseId, F1_Asp_1:T1_Brain_RTClickCount), id.vars = "ResponseId")
glimpse(d66)
d66[, value := as.numeric(value)]
d66 <- d66[!is.na(value)]
d67 <- separate(d66, variable, c("videotype", "topic", "responsetype"))
d67[, .(videotype, topic, responsetype)] |> distinct()
d67[, .(videotype)] |> distinct()
d67[, .(topic)] |> distinct()
d67[, .(responsetype)] |> distinct()
d67[responsetype == "1", responsetype := "accuracy"]
d67[responsetype == "2", responsetype := "reliable"]
d67[responsetype == "3", responsetype := "unbiased"]
d67[, responsetype := tolower(responsetype)]

d67[videotype == "F1", videotype := "false1"]
d67[videotype == "F2", videotype := "false2"]
d67[videotype == "C", videotype := "correction"]
d67[videotype == "T1", videotype := "true1"]

d68 <- dcast(d67, ResponseId + videotype + topic ~ responsetype, value.var = "value")
d68[, topic := tolower(topic)]
d68$videorating <- rowMeans(select(d68, accuracy, reliable, unbiased), na.rm = T)
summary(d68$videorating)

d68[, table(videotype)]
d68[, veracity := "none"]
d68[videotype %in% c("false2", "true1"), veracity := ifelse(videotype == "false2", "false", "true")]
d68[, .(videotype, veracity)] |> distinct()

```





```{r ctsq}
glimpse(d6)
psych::alpha(select(d6[screen_acc == 1], AOT_1:AOT_6))
psych::alpha(select(d6[screen_acc == 1], CMT_1:CMT_6))
psych::alpha(select(d6[screen_acc == 1], PIT_1:PIT_6))
psych::alpha(select(d6[screen_acc == 1], PET_1:PET_6))

psych::alpha(select(d68, accuracy, reliable, unbiased))
performance::cronbachs_alpha(select(d68, accuracy, reliable, unbiased))
table(d6$TikTok)
table(d6$TikTok_Use)
cor.test(d68$accuracy, d68$reliable)
cor.test(d68$accuracy, d68$unbiased)
cor.test(d68$unbiased, d68$reliable)

d7 <- select(d6, ResponseId, AOT_1:PET_6) |> 
    melt(id.vars = "ResponseId") |> 
    separate(variable, c("subscale", "itemno")) |> 
    group_by(ResponseId, subscale) |> 
    summarize(value = mean(value, na.rm = T)) |> 
    mutate(subscale = tolower(subscale)) |> 
    data.table()
    
d8 <- dcast(d7, ResponseId ~ subscale, value.var = "value")
```


```{r select covariates and merge}
glimpse(d6)

d6[, table(Condition)]
setDT(d6)
d6[Condition ==  1, condition := "debunk"]
d6[Condition ==  2, condition := "misinfo-only"]
d6[Condition ==  3, condition := "correction-only"]
d6[, .(condition, Condition)] |> distinct()

d6[, table(counterbalance)]
d6[counterbalance ==  1, counterbalance2 := "false-first"]
d6[counterbalance ==  2, counterbalance2 := "true-first"]
d6[, .(counterbalance, counterbalance2)] |> distinct()

dcov <- select(d6, ResponseId, age = Age, demrepc = DemRep_C, screener2acc, screener3acc, screen_acc, condition, counterbalance = counterbalance2, Education, Aspartame:TikTok_RelativeUse)
dcov


names(dcov) <- tolower(names(dcov))
names(dcov) <- gsub("_", "", names(dcov))
glimpse(dcov)

names(d8) <- tolower(names(d8))

dcov2 <- left_join(dcov, d8) |> data.table()
dcov2
```

```{r join long and wide}
names(d68) <- tolower(names(d68))
dfinal <- left_join(d68, dcov2) |> data.table()
dfinal
dfinal <- dfinal[order(responseid, videotype)]
dfinal[, .N, .(responseid)][, table(N)]

fwrite(dfinal, "../data/clean/data-long.csv")
fwrite(dcov, "../data/clean/data-cov.csv")
```

```{r}

```

